This is just a test of an initial idea.
Imagine a known binary tree that describes the relationship between a set of species. We know the relationship but we do not know the individual strength of the effects. We would like to be able to utilize the information on the relationship between the species for later classification and/or prediction. In the figure below the boxes represent obsered abundances while the circles are latent (unobserved) variables. We assume that we have a total of \(N\) simultaneous measurements of each of the observed variables.
library(mvtnorm)
N <- 100
df <- data.frame(H=rnorm(N),
I=rnorm(N),
J=rnorm(N),
K=rnorm(N),
L=rnorm(N),
M=rnorm(N),
N=rnorm(N),
O=rnorm(N)
)
Sigma <- diag(8)
Sigma[1,2] <- Sigma[2,1] <- .5
Sigma[3,4] <- Sigma[4,3] <- .5
Sigma[5,6] <- Sigma[6,5] <- .5
Sigma[7,8] <- Sigma[8,7] <- .5
mydat <- data.frame(rmvnorm(3, mean=rep(0, 8), Sigma))
Sigma <- diag(4)
Sigma[1,2] <- Sigma[2,1] <- .5
Sigma[3,4] <- Sigma[4,3] <- .5
mydat <- data.frame(rmvnorm(N, mean=rep(0, 4), Sigma))
names(mydat) <- c("D", "E", "F", "G")
head(mydat)
## D E F G
## 1 0.85333598 0.8104226 -0.4877673 0.7180980
## 2 0.60244507 0.7445297 -0.1078784 -0.4226488
## 3 -1.12122919 -1.0012946 -0.2616403 0.1201814
## 4 1.54148198 1.2870652 1.0655272 0.9354132
## 5 0.06851309 -1.4266270 -1.4125591 -0.1234586
## 6 0.67985843 1.7121311 1.5092601 0.6533469
#names(mydat) <- names(df)
head(mydat)
## D E F G
## 1 0.85333598 0.8104226 -0.4877673 0.7180980
## 2 0.60244507 0.7445297 -0.1078784 -0.4226488
## 3 -1.12122919 -1.0012946 -0.2616403 0.1201814
## 4 1.54148198 1.2870652 1.0655272 0.9354132
## 5 0.06851309 -1.4266270 -1.4125591 -0.1234586
## 6 0.67985843 1.7121311 1.5092601 0.6533469
library(sem)
qqqqqmodel.taxa <- specifyModel(text="
A<->A, theta1, NA
A->B, gam1, NA
A->C, gam1, NA
B->D, gam2, NA
B->E, gam2, NA
C->F, gam3, NA
C->G, gam3, NA
D->H, gam4, NA
D->I, gam4, NA
E->J, gam5, NA
E->K, gam5, NA
F->L, gam6, NA
F->M, gam6, NA
G->N, gam7, NA
G->O, gam7, NA
")
## NOTE: it is generally simpler to use specifyEquations() or cfa()
## see ?specifyEquations
## NOTE: adding 14 variances to the model
model.taxa2 <- specifyModel(text="
A<->A, theta1, NA
A->B, gam1, NA
A->C, NA, 1
B->D, NA, 1
B->E, gam1, NA
C->F, NA, 1
C->G, gam1, NA
")
## NOTE: it is generally simpler to use specifyEquations() or cfa()
## see ?specifyEquations
## NOTE: adding 6 variances to the model
EmpVar <- var(mydat)
EmpVar
## D E F G
## D 0.95884763 0.46356679 -0.2114149 -0.09470069
## E 0.46356679 0.89860182 -0.1156579 -0.08704957
## F -0.21141490 -0.11565794 1.0164024 0.61803530
## G -0.09470069 -0.08704957 0.6180353 0.96543561
res <- sem(model.taxa2, S=EmpVar, N=N, maxiter=50, quiet=FALSE)
#EmpVar <- var(df)
#EmpVar
#head(df)
#res <- sem(model.taxa, data=mydat, maxiter=50)
#res <- sem(model.taxa, EmpVar, N=N, maxiter=50)
# str(res)
res
##
## Model Chisquare = 0.3080346 Df = 2
##
## theta1 gam1 V[B] V[C] V[D] V[E]
## -0.4464748 0.4763963 1.0711234 1.7473364 -0.0109707 0.6770224
## V[F] V[G]
## -0.2838383 0.6716702
##
## Iterations = 46
res$C
## D E F G
## D 0.9588238 0.46200645 -0.2126989 -0.10132896
## E 0.4620064 0.89712058 -0.1013290 -0.04827274
## F -0.2126989 -0.10132896 1.0170234 0.61972564
## G -0.1013290 -0.04827274 0.6197256 0.96690523
res$A
## D E F G A B C
## D 0 0 0 0 0.0000000 1.0000000 0.0000000
## E 0 0 0 0 0.0000000 0.4763963 0.0000000
## F 0 0 0 0 0.0000000 0.0000000 1.0000000
## G 0 0 0 0 0.0000000 0.0000000 0.4763963
## A 0 0 0 0 0.0000000 0.0000000 0.0000000
## B 0 0 0 0 0.4763963 0.0000000 0.0000000
## C 0 0 0 0 1.0000000 0.0000000 0.0000000
#sem(model.taxa, data=df)